%load_ext autoreload
%autoreload 2
from Tools import *
C:\Users\mario\anaconda3\envs\tesi\lib\site-packages\pyproj\__init__.py:91: UserWarning: Valid PROJ data directory not found. Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`. warnings.warn(str(err))
from matplotlib.collections import LineCollection
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.colors as colors
from glob import glob
import pysplit
import os
working_dir = f'C:\hysplit\working'
storage_dir = f'{os.getcwd()}{os.sep}data{os.sep}step_3_data_output{os.sep}traj'
meteo_dir = f'E:\\GDAS_DATA'
def run_hysplit(basename, years, months, days, hours, altitudes, location, runtime):
pysplit.generate_bulktraj(basename, working_dir, storage_dir, meteo_dir,
years, months, hours, altitudes, location, runtime,
monthslice=days, get_reverse=True,
get_clipped=False, hysplit="C:\\hysplit\\exec\\hyts_std")
def traj_data_from_path(traj_id):
filenames = glob(f"data/step_3_data_output/traj/{traj_id}*")
traj_group = pysplit.make_trajectorygroup(filenames)
# for traj in traj_group:
# altitude0 = traj.data.geometry.apply(lambda p: p.z)[0]
# traj.trajcolor = color_dict[altitude0]
traj_coord = np.concatenate([np.array(list(zip(x.path.xy[0], x.path.xy[1]))) for x in traj_group])
return {"group": traj_group, "coords": traj_coord}
def visualize_trajectories(axes, traj_group, traj_coord, direction = 'forward', lw=0.5):
# First graph, cropout and area
m1 = viz_init(axes[0])
m2 = viz_init(axes[1])
max_height = np.max([np.max(traj_group[x].data.geometry.z) for x in range(traj_group.trajcount)])
norm = plt.Normalize(0, max_height)
norm = colors.LogNorm(vmin=1, vmax=int(np.ceil(max_height)))
for traj in traj_group:
points = traj.data.geometry
z_values = points.z
points = m1(points.x, points.y)
segments = []
for i in range(len(points[0])-1):
point_0 = np.array((points[0][i], points[1][i]))
point_1 = np.array((points[0][i+1], points[1][i+1]))
segments.append(np.stack([point_0, point_1]))
segments = np.array(segments)
lc = LineCollection(segments, cmap='jet', norm=norm, zorder=4)
# Set the values used for colormapping
lc.set_array(z_values)
lc.set_linewidth(lw)
axes[0].add_collection(lc)
divider_1 = make_axes_locatable(axes[0])
cax_1 = divider_1.append_axes("right", size="5%", pad=0.05)
cb_1 = fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap="jet"), ax=axes[0], cax=cax_1)
cb_1.set_label('Meters')
viz_x, viz_y = m2(traj_coord[:,0], traj_coord[:,1])
grid_x = int((max(viz_x) - min(viz_x)) / grid_step)
grid_y = int((max(viz_y) - min(viz_y)) / grid_step)
hb = m2.hexbin(viz_x, viz_y, ax=axes[1], gridsize=(grid_x, grid_y), bins='log', cmap='Reds')
divider_2 = make_axes_locatable(axes[1])
cax_2 = divider_2.append_axes("right", size="5%", pad=0.05)
cb = fig.colorbar(hb, ax=axes[1], cax=cax_2)
cb.set_label('log(N)')
axes[0].set_title("Height of Traj")
axes[1].set_title("Number of Trajs per bin")
return m1, m2
def run_and_visualize_hysplit(basename, years, months, days, hours, altitudes, location, runtime):
if isinstance(location, list):
for idx, loc in enumerate(tqdm(location)):
basename_2 = basename + "_" + str(idx)
filenames = glob(f"data/step_3_data_output/traj/{basename_2}*")
[os.remove(file) for file in filenames]
run_hysplit(basename_2, years, months, days, hours, altitudes, loc, runtime)
else:
filenames = glob(f"data/step_3_data_output/traj/{basename}*")
[os.remove(file) for file in filenames]
run_hysplit(basename, years, months, days, hours, altitudes, location, runtime)
visualize_trajectories(basename)
color_dict = {5.0 : 'blue',
25.0 : 'orange',
50.0 : 'red'}
%%capture
basemap=viz_init()
stazioni = csv_to_gpd("data/step_1_data_output/stazioni_enriched", crs='3031')
stazioni_x, stazioni_y = basemap(stazioni.to_crs('EPSG:4326').longitude.values, stazioni.to_crs('EPSG:4326').latitude.values)
rocks = csv_to_gpd(f"data/step_1_data_output/rocks", crs='3031')
clusters = csv_to_gpd(f"data/step_1_data_output/clusters", crs='3031')
C:\Users\mario\Desktop\Code\tesi\Tools.py:365: DtypeWarning: Columns (7,11,12,27) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv(f"{df_name}.csv")
centers_of_mass = []
for i in tqdm(range(len(clusters))):
clus_id = rocks[rocks.cluster_id == i]
clus_id = clus_id.set_crs('EPSG:3031', allow_override=True)
clus_id = clus_id.to_crs("EPSG:4326")
areas = clus_id.Shape_Area
centroids = clus_id.to_crs('EPSG:3031').geometry.centroid
mass_x = sum((centroids.x * areas)) / sum(areas)
mass_y = sum((centroids.y * areas)) / sum(areas)
mass_epsg_4326 = gpd.points_from_xy(x=[mass_x], y=[mass_y], crs="EPSG:3031").to_crs('EPSG:4326')[0].coords.xy
mass_x = mass_epsg_4326[0][0]
mass_y = mass_epsg_4326[1][0]
centers_of_mass.append((mass_x, mass_y))
centers_of_mass = pd.DataFrame(centers_of_mass, columns=['x', 'y'])
centers_of_mass = gpd.GeoDataFrame(centers_of_mass, geometry=gpd.points_from_xy(centers_of_mass.x, centers_of_mass.y), crs='EPSG:4326')
centers_3031 = centers_of_mass.to_crs("EPSG:3031")
clust_x, clust_y = basemap(centers_of_mass.geometry.x, centers_of_mass.geometry.y)
for each station and for each month take all the calculated trajectories and plot them
plot a hexbin map to get a heatmap
for each trajectory calculate if it pass at a distance lower then a given threshold to each of the clusters centroids (contribution of cluster to station is plotted by size of cluster centroids, in red)
store this information
Year: 2021
Months: January, July
Days: All
Hours: 0, 6, 12, 18
Altitudes: 5, 25, 50
Locations: each station
runtime: 10 days back-tracjetory
grid_step = 200000
distance_threshold = 100000
height_threshold = 100
max_linesize = 10
clust_influence = []
staz_influence = []
lines_color = np.array([153/255, 0, 204/255])
stat_nums = list(range(len(stazioni)))
clust_nums = list(range(len(clusters)))
months = [1, 7]
months_dict = {1: "Gennaio", 7: "Luglio"}
def logistic_score(x, L, k, x0):
y = logistic_function(x, L, k, x0)
norm_y = (y-min_score) / (max_score - min_score)
### Add not significant delta to avoid return -0 when x is closer to 0
return np.around(norm_y + 0.000000001, 3)
def logistic_function(x, L, k, x0):
denominator = 1 + np.power(np.e, (k*-1)*(x-x0))
y = L / denominator
return y
def get_near_score(distance, height):
distance_score = logistic_score(distance / distance_threshold, L, k, x0)
height_score = logistic_score(height / height_threshold, L, k, x0)
mean = np.mean([distance_score, height_score])
return mean
L=1
k=-4
x0=0.5
if k > 0:
min_score = logistic_function(0, L, k, x0)
max_score = logistic_function(1, L, k, x0)
else:
max_score = logistic_function(0, L, k, x0)
min_score = logistic_function(1, L, k, x0)
for stat_num in tqdm(stat_nums):
for month in months:
traj_id = f'station_{stat_num}_month_{month}'
traj_data = traj_data_from_path(traj_id)
near_clusters = np.zeros(len(clusters))
for traj in traj_data['group']:
traj = traj.data.set_crs("EPSG:4326").to_crs("EPSG: 3031")
for idx, cluster_center in enumerate(centers_3031.geometry):
for traj_point in traj.geometry:
distance = traj_point.distance(cluster_center)
height = traj_point.z
if distance < distance_threshold and height < height_threshold:
near_clusters[idx] += get_near_score(distance, height)
clust_influence.append(near_clusters)
for clust_num in tqdm(clust_nums):
for month in months:
traj_id = f'cluster_{clust_num}_month_{month}'
traj_data = traj_data_from_path(traj_id)
near_stations = np.zeros(len(stazioni))
for traj in traj_data['group']:
traj = traj.data.set_crs("EPSG:4326").to_crs("EPSG: 3031")
for idx, station_center in enumerate(stazioni.geometry):
for traj_point in traj.geometry:
distance = traj_point.distance(station_center)
height = traj_point.z
if distance < distance_threshold and height < height_threshold:
near_stations[idx] += get_near_score(distance, height)
staz_influence.append(near_stations)
np.save('data/step_3_data_output/clust_influence_score_corr', clust_influence)
np.save('data/step_3_data_output/staz_influence_score_corr', staz_influence)
clust_influence = np.load('data/step_3_data_output/clust_influence_score_corr.npy')
staz_influence = np.load('data/step_3_data_output/staz_influence_score_corr.npy')
clust_influence = np.array(clust_influence)
staz_influence = np.array(staz_influence)
jans = [i for i in range(len(stazioni)*2) if i % 2 == 0]
juls = [i for i in range(len(stazioni)*2) if i % 2 == 1]
jans_clust_influence = clust_influence[jans, :]
juls_clust_influence = clust_influence[juls, :]
clusters['jans_influences'] = np.sum(jans_clust_influence, axis=0)
clusters['juls_influences'] = np.sum(juls_clust_influence, axis=0)
jans = [i for i in range(len(clusters)*2) if i % 2 == 0]
juls = [i for i in range(len(clusters)*2) if i % 2 == 1]
jans_staz_influence = staz_influence[jans, :]
juls_staz_influence = staz_influence[juls, :]
stazioni['jans_influences'] = np.sum(jans_staz_influence, axis=0)
stazioni['juls_influences'] = np.sum(juls_staz_influence, axis=0)
for stat_num in tqdm(stat_nums):
for month in months:
traj_id = f'station_{stat_num}_month_{month}'
traj_data = traj_data_from_path(traj_id)
fig, axes = plt.subplots(figsize=(30, 10), nrows=1, ncols=3)
m1, m2 = visualize_trajectories(axes, traj_data['group'], traj_data['coords'], direction='backward')
clust_x_proj, clust_y_proj = m2(clust_x, clust_y)
if month==1:
m2.scatter(clust_x_proj, clust_y_proj, color='blue', zorder=5, s=jans_clust_influence[stat_num], ax=axes[1])
else:
m2.scatter(clust_x_proj, clust_y_proj, color='blue', zorder=5, s=juls_clust_influence[stat_num], ax=axes[1])
m3 = viz_init(axes[2])
m1.scatter(stazioni_x[stat_num], stazioni_y[stat_num], color='black', zorder=5, s=50, ax=axes[0])
m2.scatter(stazioni_x[stat_num], stazioni_y[stat_num], color='white', zorder=5, s=50, ax=axes[1])
m3.scatter(stazioni_x[stat_num], stazioni_y[stat_num], color='black', zorder=5, s=50, ax=axes[2])
influence_index = stat_num*len(months) + months.index(month)
for j in range(len(centers_of_mass)):
m3.scatter(clust_x_proj[j], clust_y_proj[j], color='red', zorder=5, s=30, ax=axes[2])
point_clust = centers_of_mass.iloc[j].geometry
point_staz = stazioni.to_crs("EPSG:4326").iloc[stat_num].geometry
traj_clust_to_staz = shapely.geometry.linestring.LineString([point_clust, point_staz])
if month == 1:
linewidth = (jans_clust_influence[stat_num, j] / np.max(clust_influence)) * max_linesize
else:
linewidth = (juls_clust_influence[stat_num, j] / np.max(clust_influence)) * max_linesize
m3.plot(*traj_clust_to_staz.xy, c=lines_color, latlon=True, zorder=2, linewidth=linewidth, ax=axes[2])
fig.suptitle(f'Stazione numero: {stat_num} Mese: {months_dict[month]}', fontsize=16)
axes[0].set_title(f"Traiettorie")
axes[1].set_title(f"Clusters coinvolti")
plt.show()
for clust_num in clust_nums:
for month in months:
traj_id = f'cluster_{clust_num}_month_{month}'
traj_data = traj_data_from_path(traj_id)
fig, axes = plt.subplots(figsize=(30, 10), nrows=1, ncols=3)
m1, m2 = visualize_trajectories(axes, traj_data['group'], traj_data['coords'])
if month==1:
m2.scatter(stazioni_x, stazioni_y, color='blue', zorder=5, s=jans_staz_influence[clust_num], ax=axes[1])
else:
m2.scatter(stazioni_x, stazioni_y, color='blue', zorder=5, s=juls_staz_influence[clust_num], ax=axes[1])
m3 = viz_init(axes[2])
m1.scatter(clust_x[clust_num], clust_y[clust_num], color='black', zorder=5, s=50, ax=axes[0])
m2.scatter(clust_x[clust_num], clust_y[clust_num], color='white', zorder=5, s=50, ax=axes[1])
m3.scatter(clust_x[clust_num], clust_y[clust_num], color='black', zorder=5, s=50, ax=axes[2])
influence_index = clust_num*len(months) + months.index(month)
for j in range(len(stazioni)):
m3.scatter(stazioni_x[j], stazioni_y[j], color='red', zorder=5, s=30, ax=axes[2])
point_clust = centers_of_mass.iloc[clust_num].geometry
point_staz = stazioni.to_crs("EPSG:4326").iloc[j].geometry
traj_clust_to_staz = shapely.geometry.linestring.LineString([point_clust, point_staz])
if month == 1:
linewidth = (jans_staz_influence[clust_num, j] / np.max(staz_influence)) * max_linesize
else:
linewidth = (juls_staz_influence[clust_num, j] / np.max(staz_influence)) * max_linesize
m3.plot(*traj_clust_to_staz.xy, c=lines_color, latlon=True, zorder=2, linewidth=linewidth, ax=axes[2])
fig.suptitle(f'Cluster numero: {clust_num} Mese: {months_dict[month]}', fontsize=16)
axes[0].set_title(f"Traiettorie")
axes[1].set_title(f"Stazioni coinvolti")
plt.show()
import pickle
import seaborn as sns
max_linesize = 5
lines_color = np.array([153/255, 0, 204/255])
for i in tqdm(range(len(stazioni))):
fig, axes = plt.subplots(figsize=(20, 10), nrows=1, ncols=2)
fig.suptitle(f'Stazione numero: {i}', fontsize=16)
m1 = viz_init(axes=axes[0])
m2 = viz_init(axes=axes[1])
m1.scatter(stazioni_x[i], stazioni_y[i], color='blue', zorder=5, s=60, ax=axes[0])
m2.scatter(stazioni_x[i], stazioni_y[i], color='blue', zorder=5, s=60, ax=axes[1])
with open(f"data/step_3_data_output/clusters/station_{i}_month_1.pk", "rb") as f:
cluster_1 = pickle.load(f)
with open(f"data/step_3_data_output/clusters/station_{i}_month_7.pk", "rb") as f:
cluster_7 = pickle.load(f)
max_linewidth = 10
n_1 = cluster_1.trajcount
n_7 = cluster_7.trajcount
palette_1 = sns.color_palette('pastel', cluster_1.clustercount)
palette_7 = sns.color_palette('pastel', cluster_7.clustercount)
for idx, cluster_info in enumerate(cluster_1.clusters):
cluster_size = (cluster_info.trajcount / n) * max_linewidth
m1.plot(*cluster_info.path.xy, c=palette_1[idx], latlon=True, zorder=1, linewidth=cluster_size, ax=axes[0])
for idx, cluster_info in enumerate(cluster_7.clusters):
cluster_size = (cluster_info.trajcount / n) * max_linewidth
m2.plot(*cluster_info.path.xy, c=palette_7[idx], latlon=True, zorder=1, linewidth=cluster_size, ax=axes[1])
for j in range(len(centers_of_mass)):
m1.scatter(clust_x[j], clust_y[j], color='red', zorder=1, s=30, ax=axes[0])
m2.scatter(clust_x[j], clust_y[j], color='red', zorder=1, s=30, ax=axes[1])
point_clust = centers_of_mass.iloc[j].geometry
point_staz = stazioni.to_crs("EPSG:4326").iloc[i].geometry
traj_clust_to_staz = shapely.geometry.linestring.LineString([point_clust, point_staz])
linewidth_jan = (jans_clust_influence[i, j] / np.max(clust_influence)) * max_linesize
linewidth_jul = (juls_clust_influence[i, j] / np.max(clust_influence)) * max_linesize
m1.plot(*traj_clust_to_staz.xy, c=lines_color, latlon=True, zorder=1, linewidth=linewidth_jan, ax=axes[0])
m2.plot(*traj_clust_to_staz.xy, c=lines_color, latlon=True, zorder=1, linewidth=linewidth_jul, ax=axes[1])
axes[0].set_title(f"Gennaio")
axes[1].set_title(f"Luglio")
plt.show()
import shutil
winter_all = glob("data/step_3_data_output/traj/*_*_month_7_*")
summer_all = glob("data/step_3_data_output/traj/*_*_month_1_*")
for file in tqdm(winter_all):
split = file.split("\\")
dest = f"{split[0]}/winter_all/{split[1]}"
shutil.copy(file, dest)
for file in tqdm(summer_all):
split = file.split("\\")
dest = f"{split[0]}/summer_all/{split[1]}"
shutil.copy(file, dest)
filenames_staz = glob(f"data/step_3_data_output/traj/station_*_month_*")
filenames_clust = glob(f"data/step_3_data_output/traj/cluster_*_month_*")
traj_group = pysplit.make_trajectorygroup(filenames_staz + filenames_clust)
traj_coord = np.concatenate([np.array(list(zip(x.path.xy[0], x.path.xy[1]))) for x in traj_group])
fig, axes = plt.subplots(figsize=(20, 10), nrows=1, ncols=2)
m1, m2 = visualize_trajectories(axes, traj_group, traj_coord, direction='backward', lw=0.05)
clust_x_proj, clust_y_proj = m2(clust_x, clust_y)
if month==1:
m2.scatter(clust_x_proj, clust_y_proj, color='blue', zorder=5, s=jans_clust_influence[stat_num], ax=axes[1])
else:
m2.scatter(clust_x_proj, clust_y_proj, color='blue', zorder=5, s=juls_clust_influence[stat_num], ax=axes[1])
m1.scatter(stazioni_x, stazioni_y, color='black', zorder=5, s=25, ax=axes[0])
m2.scatter(stazioni_x, stazioni_y, color='white', zorder=5, s=25, ax=axes[1])
fig.suptitle(f'Circolazione totale inverno', fontsize=16)
plt.show()